import datetime
import os
import re
from openpyxl import load_workbook
from openpyxl.utils import get_column_letter
import openpyxl
import pandas as pd
from bs4 import BeautifulSoup
from config import OUTPUT_DIRS_DEFAULT
from sort_musl_file import sort_musl_file
from spiders import spider


def get_class_details(response):
    data = []
    resp = response.content
    soup = BeautifulSoup(resp, "html5lib")
    modules_table = soup.find('table')
    tr_list = modules_table.find_all('tr')
    for tr in tr_list:
        classify = ['' for i in range(7)]
        td_list = tr.find_all('td')
        # print(td_list)
        if td_list:
            for i in range(len(td_list)):
                # print(td_list[i])
                if td_list[i].find('a') == -1:
                    # print(td_list[i])
                    a_list = td_list[i].find_all('a')
                    # print('a_list:' + a_list)
                    if len(a_list) > 1:
                        for a in a_list:
                            info = a.text + ' '
                            classify[i] = info
                    else:
                        classify[i] = a_list[0].text
                else:
                    classify[i] += td_list[i].text
        data.append(classify)
    # print(data)
    return data

def get_musl_in_homepage():
    target = {'musl-posix': "https://repo.or.cz/w/musl-tools.git/blob_plain/HEAD:/tab_posix.html",
              'musl-c99': "https://repo.or.cz/w/musl-tools.git/blob_plain/HEAD:/tab_c99.html",
              'musl-c11': "https://repo.or.cz/w/musl-tools.git/blob_plain/HEAD:/tab_c11.html"}
    tasks = list()
    api_dict = dict()
    classify = list()
    for flag, url in target.items():
        task = (url, flag)
        tasks.append(task)
        ret = spider(20, tasks)
        for x, response in ret:
            if response.status_code == 200:
                # print(response.text)
                classify = get_class_details(response)
        api_dict[flag] = classify
    # print(api_dict)
    return api_dict

def write_excel_musl(api_dict, save_file=None):
    time_now = datetime.datetime.now()
    date_time = "%s-%s-%s" % (time_now.year, time_now.month, time_now.day)
    if not save_file:
        if not (os.path.exists(OUTPUT_DIRS_DEFAULT)):
            os.mkdir(OUTPUT_DIRS_DEFAULT)
        work_dir = os.path.join(OUTPUT_DIRS_DEFAULT, date_time)
        if not (os.path.exists(work_dir)):
            os.mkdir(work_dir)
        save_file = os.path.join(work_dir, "musl_compatibility.xlsx")
    wb = openpyxl.Workbook()
    wb.remove(wb.active)
    for sdk_name, classify_list in api_dict.items():
        sheet = wb.create_sheet(sdk_name)
        # if sdk_name == 'musl-posix':
        sheet.cell(1, 1, "symbol")
        sheet.cell(1, 2, "type")
        sheet.cell(1, 3, "header")
        sheet.cell(1, 4, "modules/class")
        sheet.cell(1, 5, "status")
        sheet.cell(1, 6, "symbol_on_openharmony")
        sheet.cell(1, 7, "remarks")
        row = 2
        for signle_list in classify_list[2:]:
            sheet.cell(row, 1, signle_list[3].rstrip())
            sheet.cell(row, 2, replace_decl_func(signle_list[2].rstrip()))
            sheet.cell(row, 3, signle_list[4].strip())
            sheet.cell(row, 4, 'musl')
            sheet.cell(row, 5, signle_list[0].replace('\xa0', 'implemented').rstrip())
            sheet.cell(row, 6, '')
            sheet.cell(row, 7, '')
            row += 1
    wb.save(save_file)
    reset_col(save_file)
    sort_musl_file(save_file)
    return

def replace_sym_func(str):
    str = str.replace('T', ' symbol is in the text section &').replace('C', " symbol is common (uninitialized data)&")\
            .replace('W', ' weak symbol&').replace('V', ' weak object&').replace('R', ' symbol is in the read only data section&')\
            .replace('D', ' symbol is in the initialized data section&').strip().rstrip(' &')
    return str

def replace_decl_func(str):
    str_list = []
    for char in str:
        if char not in str_list:
            str_list.append(char)
    new_str = ''.join(str_list)
    str = new_str.replace('p', 'FUNCTION PROTOTYPE &').replace('d', 'MACRO DEFINITION &')\
        .replace('x', 'EXTERNAL AND FORWARD VARIABLE DECLARATION  &').replace('t', 'TYPEDEF &')\
        .replace('s', 'STRUCT DECLARATION &').replace('u', 'UNION DECLARATION &').rstrip(' &')
    return str

def reset_col(filename):
    wb = load_workbook(filename)
    for sheet in wb.sheetnames:
        ws = wb[sheet]
        df = pd.read_excel(filename, sheet).fillna('-')
        df.loc[len(df)] = list(df.columns)  # 把标题行附加到最后一行
        for col in df.columns:
            index = list(df.columns).index(col)  # 列序号
            letter = get_column_letter(index + 1)  # 列字母
            column = df[col].apply(lambda x: len(str(x).encode())).max()  # 获取这一列长度的最大值 当然也可以用min获取最小值 mean获取平均值
            ws.column_dimensions[letter].width = column * 1 + 2
    wb.save(filename)

if __name__ == "__main__":
    dict1 = get_musl_in_homepage()
    write_excel_musl(dict1)



